The composted tannery sludge (CTS) dataset from Prof. Ademir Araujo from Federal University of Piauí. Sequence data was analyzed by Dr. Lucas William Mendes from University of Sao Paulo.


Treatment by composted tannery sludge (CTS) / CTS concentration:
Sampling time points:

initiate libraries

rm(list=ls())

library(tidyverse)
library(vegan)
library(ape)
library(RColorBrewer)
library(ggpubr)
library(ggpmisc)

mytheme <- theme_bw()+
    theme(panel.spacing = unit(0, "lines"),
          strip.background = element_blank(),
          strip.placement = "outside",
          legend.box.background = element_rect(),
          legend.box.margin = margin(1, 1, 1, 1),
          legend.position = "right", 
          panel.grid.major = element_blank(), 
          panel.grid.minor = element_blank())

load OTU table

# feature table
com <- read.csv("out_table_rarefied.csv", header = 1, row.names = 1, sep = ",") 
com <- t(com)
str(com)
##  int [1:74, 1:17872] 480 324 458 704 612 653 955 590 458 706 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
##   ..$ : chr [1:17872] "649b4c705852cc03df6784a773e7b8e7" "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" ...
knitr::kable(
  com[1:5, 1:2],
  caption = "how the OTU table looks"
)
how the OTU table looks
649b4c705852cc03df6784a773e7b8e7 b7c7e3f5733da18123882ca1001824cc
T1_0a 480 133
T1_0b 324 142
T1_0c 458 138
T1_150a 704 181
T1_150b 612 217
cat("the OTU/feature table has been rarefied to", rarefactiondepth <- mean(rowSums(com)))
## the OTU/feature table has been rarefied to 14500

The number of samples is: 74; the number of species/ASVs is: 17872; the range of sequence number among samples is: 1, 4.9133^{4}.

load metadata

metadata <- read.csv("metadata.csv", header = 1, row.names = 1, sep = ",")

dim(metadata)
## [1] 74  2
head(metadata)
##         Treat Day
## T1_0a      T1   0
## T1_0b      T1   0
## T1_0c      T1   0
## T1_150a    T1 150
## T1_150b    T1 150
## T1_150c    T1 150

load taxonomy

taxonomy <- read.csv("taxonomy_splited_columns.csv", header = 1, row.names = 1, sep = ",")

dim(taxonomy)
## [1] 20939     7
head(taxonomy)
##                                    Domain         Phylum               Class
## 649b4c705852cc03df6784a773e7b8e7 Bacteria     Firmicutes             Bacilli
## b7c7e3f5733da18123882ca1001824cc Bacteria     Firmicutes             Bacilli
## 87e24a4c32d23bd13e47081d40f9cd48 Bacteria    Chloroflexi              KD4-96
## 286519bf837f34d632593c879b356650 Bacteria Proteobacteria Alphaproteobacteria
## a3805d6b05f8f5f2115b4467222a9186 Bacteria     Firmicutes             Bacilli
## 27af035245a17ed534476e8517ed27b4 Bacteria     Firmicutes             Bacilli
##                                        Order           Family        Genus
## 649b4c705852cc03df6784a773e7b8e7  Bacillales                              
## b7c7e3f5733da18123882ca1001824cc  Bacillales      Bacillaceae     Bacillus
## 87e24a4c32d23bd13e47081d40f9cd48                                          
## 286519bf837f34d632593c879b356650 Rhizobiales Beijerinckiaceae   Microvirga
## a3805d6b05f8f5f2115b4467222a9186  Bacillales      Bacillaceae     Bacillus
## 27af035245a17ed534476e8517ed27b4  Bacillales Paenibacillaceae Ammoniphilus
##                                                Species
## 649b4c705852cc03df6784a773e7b8e7                      
## b7c7e3f5733da18123882ca1001824cc                      
## 87e24a4c32d23bd13e47081d40f9cd48                      
## 286519bf837f34d632593c879b356650                      
## a3805d6b05f8f5f2115b4467222a9186                      
## 27af035245a17ed534476e8517ed27b4 _uncultured bacterium

using sample specific cutoffs to define the rare and common biospheres

For more details about ‘sample specific cutoffs’ please check Jia, X., Dini-Andreote, F. & Salles, J.F. Unravelling the interplay of ecological processes structuring the bacterial rare biosphere. ISME COMMUN. 2, 96 (2022). https://doi.org/10.1038/s43705-022-00177-6

# source the TruncateTable FUNCTION
source("TruncateTable.r")

# The truncated datasets can be stored as follows: 
dominant <- TruncateTable(com, typem = "dominant") 
str(dominant)
##  num [1:74, 1:471] 480 324 458 704 612 653 955 590 458 706 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
##   ..$ : chr [1:471] "649b4c705852cc03df6784a773e7b8e7" "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" ...
# write.csv(t(dominant), "OTU_table_dominant.csv")

rare <-TruncateTable(com, typem = "rare") 
str(rare)
##  num [1:74, 1:17858] 0 0 0 0 0 0 0 0 0 0 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:74] "T1_0a" "T1_0b" "T1_0c" "T1_150a" ...
##   ..$ : chr [1:17858] "b7c7e3f5733da18123882ca1001824cc" "87e24a4c32d23bd13e47081d40f9cd48" "286519bf837f34d632593c879b356650" "a3805d6b05f8f5f2115b4467222a9186" ...
# write.csv(t(rare),  "OTU_table_rare.csv")


# Combine rare and dominant biosphere
row.names(dominant) <- paste("dominant", row.names(dominant), sep = "_")
dominant <- as.data.frame(t(dominant))

row.names(rare) <- paste("rare", row.names(rare), sep = "_")
rare <- as.data.frame(t(rare))

# combine rare & dominant biospheres
com_rare_dominant <- transform(merge(dominant, rare, by = "row.names", all = TRUE), row.names = Row.names, Row.names = NULL)  
com_rare_dominant[is.na(com_rare_dominant)] <- 0
dim(com_rare_dominant)
## [1] 17872   148

total relative abundance of the rare and dominant biosphere

com_rare_dominant[1:2, 1:3]
##                                  dominant_T1_0a dominant_T1_0b dominant_T1_0c
## 000dd624e20d639541481236d8e7569f              0              0              0
## 00107421598fe0eeae292ce70483b4b5              0              0              0
df <- as.data.frame(colSums(com_rare_dominant))
df$ab <- df[, 1]
                 
df <- df %>% 
  rownames_to_column(var = "col") %>% 
  separate(col, into = c("Biosphere", "Treat", "Day"), sep = "_") %>% 
  mutate(Day = gsub('.{1}$', '', Day)) %>% 
  select(-4)

# calculate relative abundance
df$ab <- df$ab*100/rarefactiondepth

# change Treat info
df1 <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))

df1$Day <- as.numeric(as.character(df1$Day))

head(df1)
##   Biosphere        Treat Day       ab         Biospheres
## 1  dominant 0 ton CTS/ha   0 27.64828 Dominant biosphere
## 2  dominant 0 ton CTS/ha   0 21.67586 Dominant biosphere
## 3  dominant 0 ton CTS/ha   0 22.82069 Dominant biosphere
## 4  dominant 0 ton CTS/ha 150 30.54483 Dominant biosphere
## 5  dominant 0 ton CTS/ha 150 23.51034 Dominant biosphere
## 6  dominant 0 ton CTS/ha 150 29.15172 Dominant biosphere
# line plot
(p <- ggplot(df1, aes(x = Day, y = ab)) + 
    geom_smooth(method = lm, se = TRUE, size = .6, aes(color = Treat)) +
    geom_point(aes(color = Treat), size = 3, alpha = .8) + 
    scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
    facet_grid(Biospheres ~ Treat, scales = "free_y") +
    stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
    scale_x_continuous(breaks = c(0, 45, 75, 150, 180)) +
    labs(title = "", x = "Day", y = "Relative abundance (%)") +
    mytheme +
    theme(text = element_text(size = 15)))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## `geom_smooth()` using formula = 'y ~ x'

# ggsave("Total_relative_abundance_rare_dominant_biospheres.pdf", width = 20, height = 8, units = "cm", p, scale = 1.7)
# ggsave("Total_relative_abundance_rare_dominant_biospheres.jpg", width = 20, height = 8, units = "cm", p, scale = 1.7)

# change Treat info
df2 <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0", "2.5", "5", "10", "20"))) %>%
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))

df2$Treat <- as.numeric(as.character(df2$Treat))

head(df2)
##   Biosphere Treat Day       ab         Biospheres
## 1  dominant     0   0 27.64828 Dominant biosphere
## 2  dominant     0   0 21.67586 Dominant biosphere
## 3  dominant     0   0 22.82069 Dominant biosphere
## 4  dominant     0 150 30.54483 Dominant biosphere
## 5  dominant     0 150 23.51034 Dominant biosphere
## 6  dominant     0 150 29.15172 Dominant biosphere
# line plot
(p <- ggplot(df2, aes(x = Treat, y = ab)) + 
    geom_smooth(method = lm, se = TRUE, size = .6, aes(color = factor(Treat))) +
    geom_point(aes(color = factor(Treat)), size = 3, alpha = .8) + 
    scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
    stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
    scale_x_continuous(breaks = c(0, 2.5, 5, 10, 20)) +
    facet_grid(Biospheres ~ Day, scales = "free_y") +
    labs(title = "", x = "Treatment", y = "Relative abundance (%)") +
    mytheme +
    theme(text = element_text(size = 15)))
## `geom_smooth()` using formula = 'y ~ x'

# ggsave("Total_relative_abundance_rare_dominant_biospheres_treat.pdf", width = 20, height = 8, units = "cm", p, scale = 1.7)
# ggsave("Total_relative_abundance_rare_dominant_biospheres_treat.jpg", width = 20, height = 8, units = "cm", p, scale = 1.7)

number of OTUs in the rare and dominant biosphere

richness <- specnumber(t(com_rare_dominant))
df <- data.frame(richness)

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)


# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
                 Treat = as.factor(group_info[,2]),
                 Day = as.factor(group_info[,3]),
                 df)

#  get average per group
df1 <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>%
  group_by(Biospheres, Treat, Day) %>% 
  summarise(avg = mean(richness, na.rm=T)) %>% 
  arrange(Day, rev(Biospheres)) %>% 
  group_by(Day, Treat) %>% 
  mutate(label_y = cumsum(avg) - 0.5 * avg)
## `summarise()` has grouped output by 'Biospheres', 'Treat'. You can override
## using the `.groups` argument.
df1$avg <- as.integer(df1$avg)


# stacked-bar plot
(p <- ggplot(df1, aes(x = Day, y = avg, fill = Biospheres)) + 
    geom_col(color = "black", width = 0.8, lwd = 0.1) +
    scale_y_continuous(expand = c(0, 8)) +
    facet_grid(. ~ Treat) +
    labs(x = "Days", y = "Richness (NO. of OTUs)", title = "") +
    geom_text(aes(y = label_y, label = avg),  colour = "white", size = 2) +
    scale_fill_brewer(palette = "Pastel1") +
    mytheme)

# ggsave("Richness_stacked_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)

# line plot
df2 <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biospheres = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))

pd <- position_dodge(0.2)  

(p <- ggplot(df2, aes(x = Day, y = richness, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_wrap(.~Biospheres, scales = "free_y") +
    labs(title = "", x = "Day", y = "Richness (NO. of OTUs)") +
    mytheme)

# ggsave("Richness_lineplot_freey_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)

(p <- ggplot(df2, aes(x = Day, y = richness, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_grid(.~Biospheres) +
    labs(title = "", x = "Day", y = "Richness (NO. of OTUs)") +
    mytheme)

# ggsave("Richness_lineplot_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1_optional.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_1_optional.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)

beta-diversity

# dominant biosphere
dist <- vegdist(t(dominant), method = "bray", binary = FALSE, diag = 1) 

re <- pcoa(dist, correction = "none", rn = NULL)
df <- re$vectors[, 1:2]

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
                 Treat = as.factor(group_info[,2]),
                 Day = as.factor(group_info[,3]),
                 df)

df <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) 

# two way permanova
set.seed(123)
result <- adonis(dist ~ Treat*Day, data = df,  permutation = 9999) 
## 'adonis' will be deprecated: use 'adonis2' instead
result$aov.tab
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Treat      4    1.4377 0.35943  2.8552 0.11571 0.0001 ***
## Day        4    2.7843 0.69608  5.5294 0.22409 0.0001 ***
## Treat:Day 16    2.0344 0.12715  1.0101 0.16374 0.4378    
## Residuals 49    6.1684 0.12589         0.49646           
## Total     73   12.4249                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# write.csv(result$aov.tab, "PERMANOVA_dominant.csv")


(p1 <- ggplot(df, aes(Axis.1, Axis.2, shape = Treat, color = Day)) +
    geom_point(size = 3, alpha = 0.7) + 
    labs(x = paste("PCoA1 (", round(re$values$Relative_eig[1] * 100, 2), "%)", sep = ""), 
         y = paste("PCoA2 (", round(re$values$Relative_eig[2] * 100, 2), "%)", sep = ""), 
         title = "Dominant biosphere") +
    scale_color_brewer(palette="Dark2") +
    scale_shape_manual(values = c(1, 0, 15, 16, 17), name = "CTS concentration") +
    theme_bw())


# rare biosphere
dist <- vegdist(t(rare), method = "bray", binary = FALSE, diag = 1) 

re <- pcoa(dist, correction = "none", rn = NULL)
df <- re$vectors[, 1:2]

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df <- data.frame(Biosphere = as.factor(group_info[,1]),
                 Treat = as.factor(group_info[,2]),
                 Day = as.factor(group_info[,3]),
                 df)

df <- df %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) 

# two way permanova
set.seed(123)
result <- adonis(dist ~ Treat*Day, data = df,  permutation = 9999) 
## 'adonis' will be deprecated: use 'adonis2' instead
result$aov.tab
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Treat      4    3.0062 0.75155  3.3437 0.15043 0.0001 ***
## Day        4    2.3690 0.59224  2.6349 0.11854 0.0001 ***
## Treat:Day 16    3.5957 0.22473  0.9998 0.17992 0.4774    
## Residuals 49   11.0135 0.22477         0.55111           
## Total     73   19.9843                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# write.csv(result$aov.tab, "PERMANOVA_rare.csv")

(p2 <- ggplot(df, aes(Axis.1, Axis.2, shape = Treat, color = Day)) +
    geom_point(size = 3, alpha = 0.7) + 
    labs(x = paste("PCoA1 (", round(re$values$Relative_eig[1] * 100, 2), "%)", sep = ""), 
         y = paste("PCoA2 (", round(re$values$Relative_eig[2] * 100, 2), "%)", sep = ""), 
         title = "Rare biosphere") +
    scale_color_brewer(palette="Dark2") +
    scale_shape_manual(values = c(1, 0, 15, 16, 17), name = "CTS concentration") +
    theme_bw())


p <- ggarrange(p1, p2, labels = c("A", "B"), common.legend = TRUE, legend = "right", ncol = 2)

# ggsave("PCoA_rare_dominant.pdf", width = 16, height = 7.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_2.pdf", width = 16, height = 7.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_2.jpg", width = 16, height = 7.5, units = "cm", p, scale = 1.5)

Phyla composition

# species composition of the rare and dominant biosphere at Phyla level (relative abundance) 

# add taxonomy info
df <- transform(merge(taxonomy[, -c(1, 4:7)], com_rare_dominant, by = "row.names"), row.names = Row.names, Row.names = NULL)  

df <- df %>% select(-Class)


warning("double check is the table rarified or not?")
## Warning: double check is the table rarified or not?
(rarefactiondepth <- mean(rowSums(com)))
## [1] 14500
# combine OTUs in the same phylum
df <- aggregate(df[,-1], list(df$Phylum), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- 100*df/rarefactiondepth


df[1:2, 1:4]
##                dominant_T1_0a dominant_T1_0b dominant_T1_0c dominant_T1_150a
## Acidobacteria        8.068966       6.524138       5.786207         1.710345
## Actinobacteria       1.165517       4.634483       1.979310        11.655172
row.names(df)
##  [1] "Acidobacteria"       "Actinobacteria"      "Armatimonadetes"    
##  [4] "Bacteroidetes"       "BRC1"                "Chlamydiae"         
##  [7] "Chloroflexi"         "Cyanobacteria"       "Dadabacteria"       
## [10] "Deinococcus-Thermus" "Dependentiae"        "Elusimicrobia"      
## [13] "Entotheonellaeota"   "Epsilonbacteraeota"  "FBP"                
## [16] "FCPU426"             "Fibrobacteres"       "Firmicutes"         
## [19] "Fusobacteria"        "GAL15"               "Gemmatimonadetes"   
## [22] "Halanaerobiaeota"    "Hydrogenedentes"     "Kiritimatiellaeota" 
## [25] "Latescibacteria"     "Nitrospirae"         "Omnitrophicaeota"   
## [28] "Patescibacteria"     "Planctomycetes"      "Proteobacteria"     
## [31] "Rokubacteria"        "Spirochaetes"        "Tenericutes"        
## [34] "Verrucomicrobia"     "WPS-2"               "WS2"

all phyla

# transpose the dataframe
df1 <- t(df)
df1 <- as.data.frame(df1)

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df1), t(as.data.frame(strsplit(as.character(row.names(df1)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df1.1 <- data.frame(Biosphere = as.factor(group_info[,1]),
                  Treat = as.factor(group_info[,2]),
                  Day = as.factor(group_info[,3]),
                  df1)

#  pivot data frame from wide to long
df1.2 <- df1.1 %>%
  pivot_longer(cols = 4:ncol(df1.1), 
               names_to = "Phyla", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere")))

head(df1.2)
## # A tibble: 6 × 5
##   Biosphere          Treat        Day   Phyla           value
##   <fct>              <fct>        <fct> <chr>           <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0     Acidobacteria   8.07 
## 2 Dominant biosphere 0 ton CTS/ha 0     Actinobacteria  1.17 
## 3 Dominant biosphere 0 ton CTS/ha 0     Armatimonadetes 0    
## 4 Dominant biosphere 0 ton CTS/ha 0     Bacteroidetes   0.621
## 5 Dominant biosphere 0 ton CTS/ha 0     BRC1            0    
## 6 Dominant biosphere 0 ton CTS/ha 0     Chlamydiae      0
# write_csv(df1.2, "phyla_composition_all_rare_dominnat.csv")

#  pivot data frame from wide to long
df1.1 <- df1.1 %>%
  pivot_longer(cols = 4:ncol(df1.1), 
               names_to = "Phyla", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>% 
  group_by(Biosphere, Treat, Day, Phyla) %>% 
  summarise(avg = mean(value, na.rm=T)) 
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
head(df1.1)
## # A tibble: 6 × 5
## # Groups:   Biosphere, Treat, Day [1]
##   Biosphere          Treat        Day   Phyla             avg
##   <fct>              <fct>        <fct> <chr>           <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0     Acidobacteria   6.79 
## 2 Dominant biosphere 0 ton CTS/ha 0     Actinobacteria  2.59 
## 3 Dominant biosphere 0 ton CTS/ha 0     Armatimonadetes 0    
## 4 Dominant biosphere 0 ton CTS/ha 0     Bacteroidetes   0.329
## 5 Dominant biosphere 0 ton CTS/ha 0     BRC1            0    
## 6 Dominant biosphere 0 ton CTS/ha 0     Chlamydiae      0
set.seed(112358)
(colourCount = length(colnames(df1)))
## [1] 36
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col <- sample(col_vector, colourCount)
# pie(rep(1,colourCount), col=sample(col_vector, colourCount))

# stacked-bar plot
(p <- ggplot(df1.1, aes(x = Day, y = avg, fill = Phyla)) + 
    geom_bar(stat = "identity", width = 0.8) +
    scale_y_continuous(expand = c(0, 0)) +
    scale_fill_manual(values = col) +
    facet_grid(Biosphere ~ Treat) +
    labs(x = "Days", y = "Relative abundance (%)", title = "") +
    guides(fill = guide_legend(title="Phyla", reverse = TRUE)) +
    mytheme)

ggsave("phyla_stacked_barplots_all_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)

only keep phyla with average relative abundance > 0.1%

# only keep phyla with average relative abundance > 0.1%
df2 <- df
df2$mean <- rowMeans(df2)
df2 <- subset(df2, mean > 0.1)
df2 <- df2[with(df2, order(mean)), ]
df2$mean <- NULL

# phyla that abundance more than 0.1%
row.names(df2)
##  [1] "Dependentiae"     "Armatimonadetes"  "Cyanobacteria"    "Nitrospirae"     
##  [5] "Patescibacteria"  "Rokubacteria"     "Bacteroidetes"    "Verrucomicrobia" 
##  [9] "Gemmatimonadetes" "Planctomycetes"   "Chloroflexi"      "Acidobacteria"   
## [13] "Firmicutes"       "Proteobacteria"   "Actinobacteria"
# transpose the dataframe
df2 <- t(df2)
df2 <- as.data.frame(df2)

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df2), t(as.data.frame(strsplit(as.character(row.names(df2)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df3 <- data.frame(Biosphere = as.factor(group_info[,1]),
                  Treat = as.factor(group_info[,2]),
                  Day = as.factor(group_info[,3]),
                  df2)

#  pivot data frame from wide to long
df3 <- df3 %>%
  pivot_longer(cols = 4:ncol(df3), 
               names_to = "Phyla", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Biosphere = factor(Biosphere, levels = c("dominant", "rare"), labels = c("Dominant biosphere", "Rare biosphere"))) %>% 
  group_by(Biosphere, Treat, Day, Phyla) %>% 
  summarise(avg = mean(value, na.rm=T)) #  summarise_at(vars("PM25", "Ozone", "CO2"), mean); or summarise(across(PM25:CO2, mean))
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
head(df3)
## # A tibble: 6 × 5
## # Groups:   Biosphere, Treat, Day [1]
##   Biosphere          Treat        Day   Phyla             avg
##   <fct>              <fct>        <fct> <chr>           <dbl>
## 1 Dominant biosphere 0 ton CTS/ha 0     Acidobacteria   6.79 
## 2 Dominant biosphere 0 ton CTS/ha 0     Actinobacteria  2.59 
## 3 Dominant biosphere 0 ton CTS/ha 0     Armatimonadetes 0    
## 4 Dominant biosphere 0 ton CTS/ha 0     Bacteroidetes   0.329
## 5 Dominant biosphere 0 ton CTS/ha 0     Chloroflexi     1.20 
## 6 Dominant biosphere 0 ton CTS/ha 0     Cyanobacteria   0
set.seed(112358)
(colourCount = length(colnames(df2)))
## [1] 15
qual_col_pals <- brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
col <- sample(col_vector, colourCount)
# pie(rep(1,colourCount), col=sample(col_vector, colourCount))

# stacked-bar plot
(p <- ggplot(df3, aes(x = Day, y = avg, fill = Phyla)) + 
    geom_bar(stat = "identity", width = 0.8) +
    scale_y_continuous(expand = c(0, 0)) +
    scale_fill_manual(values = col) +
    facet_grid(Biosphere ~ Treat) +
    labs(x = "Days", y = "Relative abundance (%)", title = "") +
    guides(fill = guide_legend(title="Phyla", reverse = TRUE)) +
    mytheme)

# ggsave("phyla_stacked_barplots_more_than_0.1_rare_dominant.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.pdf", width = 14, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_3.jpg", width = 14, height = 8.5, units = "cm", p, scale = 1.5)

define different types of rarity and commonness

first find the most types (except transiently rare)

# find all taxa belongs to the rare biosphere
taxa_rare <- row.names(rare)
taxa_dominant <- row.names(dominant)

# create a data.frame that
df_types <- data.frame(OTU_ID = colnames(com), 
                 Types = NA)

conditionally_rare_dominant <- taxa_dominant[taxa_dominant %in% taxa_rare]
permanently_dominant <- taxa_dominant[!taxa_dominant %in% taxa_rare]
only_rare <- taxa_rare[!taxa_rare %in% taxa_dominant]

cat("the number of OTUs in the dominant biosphere is", length(taxa_dominant),
    "\nthe number of OTUs in the rare biosphere is", length(taxa_rare),
    "\nthe number of conditionally rare/dominant OTUs is", length(conditionally_rare_dominant),
    "\nthe number of OTUs that only apears in the dominant biosphere is", length(permanently_dominant),
    "\nthe number of rare OTUs that not appear in the dominant biosphere is", length(only_rare))
## the number of OTUs in the dominant biosphere is 471 
## the number of OTUs in the rare biosphere is 17858 
## the number of conditionally rare/dominant OTUs is 457 
## the number of OTUs that only apears in the dominant biosphere is 14 
## the number of rare OTUs that not appear in the dominant biosphere is 17401
df_types$Types[df_types$OTU_ID %in% conditionally_rare_dominant] <- "conditionally_rare_dominant"
df_types$Types[df_types$OTU_ID %in% only_rare] <- "only_rare"
df_types$Types[df_types$OTU_ID %in% permanently_dominant] <- "permanently_dominant"

df_types <- df_types %>%
  # mutate(Types = factor(Types)) %>% 
  mutate(OTU_ID = as.character(OTU_ID))

head(df_types)
##                             OTU_ID                       Types
## 1 649b4c705852cc03df6784a773e7b8e7        permanently_dominant
## 2 b7c7e3f5733da18123882ca1001824cc conditionally_rare_dominant
## 3 87e24a4c32d23bd13e47081d40f9cd48 conditionally_rare_dominant
## 4 286519bf837f34d632593c879b356650 conditionally_rare_dominant
## 5 a3805d6b05f8f5f2115b4467222a9186 conditionally_rare_dominant
## 6 27af035245a17ed534476e8517ed27b4 conditionally_rare_dominant
table(df_types$Types)
## 
## conditionally_rare_dominant                   only_rare 
##                         457                       17401 
##        permanently_dominant 
##                          14
# write.csv(df_types,  "OTU_ID_and_types.csv")

different types of rarity and dominance in a facet plot

com_classified <- com_rare_dominant %>% 
  rownames_to_column(var = "OTU_ID") 

com_classified <- left_join(df_types, com_classified, by = "OTU_ID") %>% 
  column_to_rownames(var = "OTU_ID")


# last step - define the "transiently rare"
com_classified$Types[which(rowSums(com_classified[, -1] != 0) == 1 & com_classified$Types != "conditionally_rare_dominant "& com_classified$Types != "permanently_dominant") ] <- "transiently_rare"

com_classified[1:2, 1:3]
##                                                        Types dominant_T1_0a
## 649b4c705852cc03df6784a773e7b8e7        permanently_dominant            480
## b7c7e3f5733da18123882ca1001824cc conditionally_rare_dominant            133
##                                  dominant_T1_0b
## 649b4c705852cc03df6784a773e7b8e7            324
## b7c7e3f5733da18123882ca1001824cc            142
df <- com_classified %>%
  group_by(Types) %>%
  summarise(across(everything(), list(sum))) %>% 
  column_to_rownames(var = "Types")

df <- as.data.frame(t(df))

# calculate relative abundance
df <- 100*df/rarefactiondepth

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
                  Treat = as.factor(group_info[,2]),
                  Day = as.factor(group_info[,3]),
                  df)

#  pivot data frame from wide to long
df2 <- df1 %>%
  pivot_longer(cols = 4:ncol(df1), 
               names_to = "Types", 
               values_to = "value") 

df2$Types[df2$Types == "conditionally_rare_dominant" & df2$Biosphere == "dominant"] <- "conditionally_dominant"
df2$Types[df2$Types == "conditionally_rare_dominant" & df2$Biosphere == "rare"] <- "conditionally_rare"

df2 <- df2 %>% 
  select(-Biosphere) %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Types = factor(Types, levels = c("conditionally_dominant", "permanently_dominant", "only_rare", "conditionally_rare", "transiently_rare"), labels = c("Conditionally dominant", "Permanently dominant", "Permanently rare", "Conditionally rare", "Transiently rare"))) %>% 
  filter(value > 0)

df2$Day <- as.numeric(as.character(df2$Day))

head(df2)
## # A tibble: 6 × 4
##   Treat          Day Types                  value
##   <fct>        <dbl> <fct>                  <dbl>
## 1 0 ton CTS/ha     0 Conditionally dominant 21.2 
## 2 0 ton CTS/ha     0 Permanently dominant    6.44
## 3 0 ton CTS/ha     0 Conditionally dominant 18.0 
## 4 0 ton CTS/ha     0 Permanently dominant    3.68
## 5 0 ton CTS/ha     0 Conditionally dominant 19.7 
## 6 0 ton CTS/ha     0 Permanently dominant    3.16
str(df2)
## tibble [370 × 4] (S3: tbl_df/tbl/data.frame)
##  $ Treat: Factor w/ 5 levels "0 ton CTS/ha",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Day  : num [1:370] 0 0 0 0 0 0 150 150 150 150 ...
##  $ Types: Factor w/ 5 levels "Conditionally dominant",..: 1 2 1 2 1 2 1 2 1 2 ...
##  $ value: num [1:370] 21.21 6.44 17.99 3.68 19.66 ...
# line plot
(p <- ggplot(df2, aes(x = Day, y = value)) + 
    geom_smooth(method = lm, se = TRUE, size = .6, aes(color = Treat)) +
    geom_point(aes(color = Treat), size = 3, alpha = .8) + 
    scale_color_brewer(palette = "PuBu", name = "CTS concentration") +
    facet_grid(Types~Treat, scales="free_y") +
    stat_poly_eq(mapping = use_label(c("R2", "P")), p.digits = 2, label.y = "bottom", label.x = "left") +
    scale_x_continuous(breaks=c(0, 45, 75, 150, 180)) +
    labs(title = "", x = "Day", y = "Relative abundance (%)") +
    mytheme +
    theme(text = element_text(size = 15)))
## `geom_smooth()` using formula = 'y ~ x'
## Warning in ci_f_ncp(stat, df1 = df1, df2 = df2, probs = probs): Upper limit
## outside search range. Set to the maximum of the parameter range.
## Warning: Computation failed in `stat_poly_eq()`
## Caused by error in `check_output()`:
## ! out[1] <= out[2] is not TRUE

# ggsave("Figure_5_new.pdf", width = 20, height = 15, units = "cm", p, scale = 1.7)
# ggsave("Figure_5_new.jpg", width = 20, height = 15, units = "cm", p, scale = 1.7)

dominant biosphere

# add rarify classification to the dominant OTU table
dominant_classified <- dominant %>% 
  rownames_to_column(var = "OTU_ID") 

dominant_classified <- left_join(dominant_classified, df_types, by = "OTU_ID") %>% 
  column_to_rownames(var = "OTU_ID")

# define the "transiently dominant"
# dominant_classified$Types[which(rowSums(dominant_classified[,-ncol(dominant_classified)] != 0) == 1)] = "transiently_dominant"
# conditionally_rare_dominant 
#                         236 
#        permanently_dominant 
#                           1 
#        transiently_dominant 
#                         234 

# write.csv(dominant_classified,  "OTU_table_dominant_with_types.csv")

# summarize sequence number per type
df <- aggregate(dominant_classified[,-ncol(dominant_classified)], list(dominant_classified$Types), sum)

row.names(df) <- df$Group.1
df <- df[, -1]
df <- as.data.frame(t(df))

# calculate relative abundance
df <- 100*df/rarefactiondepth

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
                  Treat = as.factor(group_info[,2]),
                  Day = as.factor(group_info[,3]),
                  df)

#  pivot data frame from wide to long
df2 <- df1 %>%
  pivot_longer(cols = 4:ncol(df1), 
               names_to = "Types", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Types = factor(Types, levels = c("conditionally_rare_dominant", "permanently_dominant"), labels = c("Conditionally dominant", "Permanently dominant"))) %>% 
  group_by(Biosphere, Treat, Day, Types) %>% 
  summarise(avg = mean(value, na.rm=T)) 
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
df2$avg <- as.integer(df2$avg)

my_palette = c(brewer.pal(8, "Pastel2")[c(5:6)])

# stacked-bar plot
(p1 <- ggplot(df2, aes(x = Day, y = avg, fill = Types)) + 
    geom_col(color = "black", width = 0.8, lwd = 0.1) +
    scale_y_continuous(expand = c(0, 0), limits=c(0, 80)) +
    scale_fill_manual(values = my_palette) +
    facet_grid(. ~ Treat) +
    labs(x = "Days", y = "Relative abundance (%)", title = "Dominant biosphere") +
    guides(fill = guide_legend(title="Types of dominance")) +
    mytheme+
    theme(legend.position = "bottom"))

# line plot

#  pivot data frame from wide to long
df3 <- df1 %>%
  pivot_longer(cols = 4:ncol(df1), 
               names_to = "Types", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Types = factor(Types, levels = c("conditionally_rare_dominant", "permanently_dominant"), labels = c("Conditionally dominant", "Permanently dominant")))

(f1 <- ggplot(df3, aes(x = Day, y = value, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.4, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 2, alpha = 0.9, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_grid(.~Types) +
    labs(title = "Dominant biosphere", x = "Day", y = "Relative abundance (%)") +
    mytheme)

rare biosphere

# add rarify classification to the rare OTU table
rare_classified <- rare %>% 
  rownames_to_column(var = "OTU_ID") 

rare_classified <- left_join(rare_classified, df_types, by = "OTU_ID") %>% 
  column_to_rownames(var = "OTU_ID")

# define the "transiently rare"
rare_classified$Types[which(rowSums(rare_classified[,-ncol(rare_classified)] != 0) == 1)] = "transiently_rare"

# write.csv(rare_classified,  "OTU_table_rare_with_types.csv")

# summarize sequence number per type
df <- aggregate(rare_classified[,-ncol(rare_classified)], list(rare_classified$Types), sum)

row.names(df) <- df$Group.1
df <- df[, -1]
df <- as.data.frame(t(df))

# calculate relative abundance
df <- 100*df/rarefactiondepth

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X3 <- gsub('.{1}$', '', group_info$X3)

# combine metadata to the dominant phyla table
df1 <- data.frame(Biosphere = as.factor(group_info[,1]),
                  Treat = as.factor(group_info[,2]),
                  Day = as.factor(group_info[,3]),
                  df)

#  pivot data frame from wide to long
df2 <- df1 %>%
  pivot_longer(cols = 4:ncol(df1), 
               names_to = "Types", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Types = factor(Types, levels = c("only_rare", "transiently_rare", "conditionally_rare_dominant"), labels = c("Permanently rare", "Transiently rare", "Conditionally rare"))) %>% 
  group_by(Biosphere, Treat, Day, Types) %>% 
  summarise(avg = mean(value, na.rm=T)) 
## `summarise()` has grouped output by 'Biosphere', 'Treat', 'Day'. You can
## override using the `.groups` argument.
df2$avg <- as.integer(df2$avg)

my_palette = c(brewer.pal(8, "Pastel1")[c(1:3)])

# stacked-bar plot
(p2 <- ggplot(df2, aes(x = Day, y = avg, fill = Types)) + 
    geom_col(color = "black", width = 0.8, lwd = 0.1) +
    scale_y_continuous(expand = c(0, 0), limits=c(0, 80)) +
    scale_fill_manual(values = my_palette) +
    facet_grid(. ~ Treat) +
    labs(x = "Days", y = "Relative abundance (%)", title = "Rare biosphere") +
    guides(fill = guide_legend(title="Types of rarity")) +
    mytheme +
    theme(legend.position = "bottom"))

df3 <- df1 %>%
  pivot_longer(cols = 4:ncol(df1), 
               names_to = "Types", 
               values_to = "value") %>% 
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) %>% 
  mutate(Types = factor(Types, levels = c("only_rare", "conditionally_rare_dominant", "transiently_rare"), labels = c("Permanently rare", "Conditionally rare", "Transiently rare")))

(f2 <- ggplot(df3, aes(x = Day, y = value, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.4, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 2, alpha = 0.9, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_grid(.~Types) +
    labs(title = "Rare biosphere", x = "Day", y = "Relative abundance (%)") +
    mytheme)

p <- ggarrange(p1, p2, labels = c("A", "B"), ncol = 2)

# ggsave("Types_of_rare_dominant_abundance_stacked_barplots.pdf", width = 24, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_5.pdf", width = 24, height = 8.5, units = "cm", p, scale = 1.5)
# ggsave("Figure_5.jpg", width = 24, height = 8.5, units = "cm", p, scale = 1.5)

f <- ggarrange(f1, f2, labels = c("A", "B"), widths = c(0.8, 1.2), common.legend = TRUE, legend = "right", ncol = 2)

# ggsave("Types_of_rare_dominant_abundance_lineplots_raw.pdf", width = 20, height = 5, units = "cm", f, scale = 1.5)
# ggsave("Figure_6.pdf", width = 20, height = 5, units = "cm", f, scale = 1.5)
# ggsave("Figure_6.jpg", width = 20, height = 5, units = "cm", f, scale = 1.5)

the abundance and taxonomy composition of conditionally rare/dominant

df <- as.data.frame(t(com))

df <- df %>% 
  rownames_to_column(var = "OTU_ID") 

df <- left_join(df_types, df, by = "OTU_ID") %>% 
  column_to_rownames(var = "OTU_ID") %>% 
  filter(Types == "conditionally_rare_dominant") %>% 
  select(-Types)

There are 457 OTUs are conditionally rare/dominant.

Check the relative abundance of these conditionally rare/dominant at genus level

# add taxonomy info - Genera
df <- transform(merge(taxonomy[, -c(1:5)], df, by = "row.names"), row.names = Row.names, Row.names = NULL)  

df <- df %>% select(-Species)

df <- aggregate(df[,-1], list(df$Genus), sum)
row.names(df) <- df$Group.1
df <- df[, -1]
df <- 100*df/rarefactiondepth

# remove these rows
row.names.remove <- c("", "uncultured", "uncultured soil bacterium")

df <- df[!(row.names(df) %in% row.names.remove), ]

# genus kept for plotting
row.names(df)
##  [1] "Acidibacter"                              
##  [2] "Actinomadura"                             
##  [3] "ADurb.Bin063-1"                           
##  [4] "Aeromicrobium"                            
##  [5] "Alicyclobacillus"                         
##  [6] "Ammoniphilus"                             
##  [7] "Anaeromyxobacter"                         
##  [8] "Bacillus"                                 
##  [9] "Blastocatella"                            
## [10] "Bradyrhizobium"                           
## [11] "Brevibacillus"                            
## [12] "Bryobacter"                               
## [13] "Candidatus Solibacter"                    
## [14] "Candidatus Udaeobacter"                   
## [15] "Chlorobi bacterium OLB5"                  
## [16] "Chryseobacterium"                         
## [17] "Clostridium sensu stricto 8"              
## [18] "Cohnella"                                 
## [19] "Conexibacter"                             
## [20] "Crossiella"                               
## [21] "Deinococcus"                              
## [22] "Dongia"                                   
## [23] "Dyadobacter"                              
## [24] "Ellin6067"                                
## [25] "Exiguobacterium"                          
## [26] "Frankia"                                  
## [27] "Gaiella"                                  
## [28] "Gemmata"                                  
## [29] "Gemmatirosa"                              
## [30] "Geodermatophilus"                         
## [31] "Labrys"                                   
## [32] "Luteolibacter"                            
## [33] "Massilia"                                 
## [34] "metagenome"                               
## [35] "Microbacterium"                           
## [36] "Microbispora"                             
## [37] "Microlunatus"                             
## [38] "Micromonospora"                           
## [39] "Microvirga"                               
## [40] "MND1"                                     
## [41] "Mycobacterium"                            
## [42] "Nibrella"                                 
## [43] "Nitrospira"                               
## [44] "Nocardia"                                 
## [45] "Nocardioides"                             
## [46] "Nonomuraea"                               
## [47] "Nordella"                                 
## [48] "Paenibacillus"                            
## [49] "Pantoea"                                  
## [50] "Parviterribacter"                         
## [51] "Pedomicrobium"                            
## [52] "Pirellula"                                
## [53] "Pseudonocardia"                           
## [54] "RB41"                                     
## [55] "Reyranella"                               
## [56] "Rubrobacter"                              
## [57] "Shimazuella"                              
## [58] "Singulisphaera"                           
## [59] "Solirubrobacter"                          
## [60] "Sphingobacterium"                         
## [61] "Sphingomonas"                             
## [62] "Stenotrophomonas"                         
## [63] "Streptomyces"                             
## [64] "Subgroup 10"                              
## [65] "Tellurimicrobium"                         
## [66] "Terrimonas"                               
## [67] "Tumebacillus"                             
## [68] "uncultured Aciditerrimonas sp."           
## [69] "uncultured Acidobacteriales bacterium"    
## [70] "uncultured actinobacterium"               
## [71] "uncultured bacterium"                     
## [72] "uncultured beta proteobacterium"          
## [73] "uncultured Chloroflexi bacterium"         
## [74] "uncultured delta proteobacterium"         
## [75] "uncultured Desulfuromonadales bacterium"  
## [76] "uncultured Firmicutes bacterium"          
## [77] "uncultured Gaiella sp."                   
## [78] "uncultured Gram-positive bacterium"       
## [79] "uncultured Holophagae bacterium"          
## [80] "uncultured organism"                      
## [81] "uncultured Pedosphaera sp."               
## [82] "uncultured proteobacterium"               
## [83] "uncultured Rubrobacterales bacterium"     
## [84] "uncultured Rubrobacteria bacterium"       
## [85] "uncultured Solirubrobacter sp."           
## [86] "uncultured Syntrophobacteraceae bacterium"
df <- as.data.frame(t(df))

# prepare a meta data file
group_info <- data.frame(row.names = rownames(df), t(as.data.frame(strsplit(as.character(row.names(df)), "_"))))

# remove last letter in the third column
group_info$X2 <- gsub('.{1}$', '', group_info$X2)

# combine metadata to the dominant phyla table
df <- data.frame(Treat = as.factor(group_info[,1]),
                 Day = as.factor(group_info[,2]),
                 df)

# write.csv(df, "conditionally_rare_dominant_genera.csv")

#  pivot data frame from wide to long
df1 <- df %>%
  pivot_longer(cols = 3:ncol(df), 
               names_to = "Genus", 
               values_to = "value") 

head(df1)
## # A tibble: 6 × 4
##   Treat Day   Genus            value
##   <fct> <fct> <chr>            <dbl>
## 1 T1    0     Acidibacter      0    
## 2 T1    0     Actinomadura     0    
## 3 T1    0     ADurb.Bin063.1   0    
## 4 T1    0     Aeromicrobium    0    
## 5 T1    0     Alicyclobacillus 0    
## 6 T1    0     Ammoniphilus     0.290
df1 <- df1 %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0 ton CTS/ha", "2.5 ton CTS/ha", "5 ton CTS/ha", "10 ton CTS/ha", "20 ton CTS/ha"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) 
# %>% 
#   group_by(Treat, Day, Genus) %>% 
#   summarise(avg = mean(value, na.rm=T)) 

pd <- position_dodge(0.2)  

(p <- ggplot(df1, aes(x = Day, y = value, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_wrap(~Genus, nrow = 11, scales = "free_y") +
    labs(title = "Conditionally rare/dominant genera", x = "Day", y = "Relative abundance (%)") +
    mytheme + 
    theme(legend.position = c(0.8, 0.03)) + 
    theme(strip.text = element_text(face = "italic")))

# ggsave("genus_conditionally_rare_dominant.pdf", width = 32, height = 30, units = "cm", p, scale = 1.5)

df2 <- df1 %>% filter(
  Genus %in% c("Nocardioides", "Nordella", "Parviterribacter", "Rubrobacter", "Tellurimicrobium",
  "uncultured.actinobacterium", "uncultured.Gaiella.sp.", "uncultured.Solirubrobacter.sp."))

df2
## # A tibble: 592 × 4
##    Treat        Day   Genus                           value
##    <fct>        <fct> <chr>                           <dbl>
##  1 0 ton CTS/ha 0     Nocardioides                   0     
##  2 0 ton CTS/ha 0     Nordella                       0     
##  3 0 ton CTS/ha 0     Parviterribacter               0     
##  4 0 ton CTS/ha 0     Rubrobacter                    0     
##  5 0 ton CTS/ha 0     Tellurimicrobium               0     
##  6 0 ton CTS/ha 0     uncultured.actinobacterium     0     
##  7 0 ton CTS/ha 0     uncultured.Gaiella.sp.         0     
##  8 0 ton CTS/ha 0     uncultured.Solirubrobacter.sp. 0.131 
##  9 0 ton CTS/ha 0     Nocardioides                   0.448 
## 10 0 ton CTS/ha 0     Nordella                       0.0828
## # … with 582 more rows
(p2 <- ggplot(df2, aes(x = Day, y = value, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 3, position = pd) +
    scale_color_brewer(palette="PuBu", name = "CTS concentration") +
    facet_wrap(~Genus, nrow = 4, scales = "free_y") +
    labs(title = "", x = "Day", y = "Relative abundance (%)") +
    mytheme + 
    theme(strip.text = element_text(face = "italic")))

# ggsave("Figure_4.pdf", width = 10, height = 12, units = "cm", p2, scale = 1.5)
# ggsave("Figure_4.jpg", width = 10, height = 12, units = "cm", p2, scale = 1.5)


# another way to show the results

#  pivot data frame from wide to long
df1 <- df %>%
  pivot_longer(cols = 3:ncol(df), 
               names_to = "Genus", 
               values_to = "value") 

head(df1)
## # A tibble: 6 × 4
##   Treat Day   Genus            value
##   <fct> <fct> <chr>            <dbl>
## 1 T1    0     Acidibacter      0    
## 2 T1    0     Actinomadura     0    
## 3 T1    0     ADurb.Bin063.1   0    
## 4 T1    0     Aeromicrobium    0    
## 5 T1    0     Alicyclobacillus 0    
## 6 T1    0     Ammoniphilus     0.290
df1 <- df1 %>%
  mutate(Treat = factor(Treat, levels = c("T1", "T2", "T3", "T4", "T5"), labels = c("0", "2.5", "5", "10", "20"))) %>% 
  mutate(Day = factor(Day, levels = c("0", "45", "75", "150", "180"))) 

df2 <- df1 %>% filter(
  Genus %in% c("Nocardioides", "Nordella", "Parviterribacter", "Rubrobacter", "Tellurimicrobium",
  "uncultured.actinobacterium", "uncultured.Gaiella.sp.", "uncultured.Solirubrobacter.sp."))

df2
## # A tibble: 592 × 4
##    Treat Day   Genus                           value
##    <fct> <fct> <chr>                           <dbl>
##  1 0     0     Nocardioides                   0     
##  2 0     0     Nordella                       0     
##  3 0     0     Parviterribacter               0     
##  4 0     0     Rubrobacter                    0     
##  5 0     0     Tellurimicrobium               0     
##  6 0     0     uncultured.actinobacterium     0     
##  7 0     0     uncultured.Gaiella.sp.         0     
##  8 0     0     uncultured.Solirubrobacter.sp. 0.131 
##  9 0     0     Nocardioides                   0.448 
## 10 0     0     Nordella                       0.0828
## # … with 582 more rows
# %>% 
#   group_by(Treat, Day, Genus) %>% 
#   summarise(avg = mean(value, na.rm=T)) 

(p3 <- ggplot(df2, aes(x = Treat, y = value, group = Treat, colour = Treat)) + 
    stat_summary(fun.data = "mean_se", geom = "errorbar", width = 0.2, alpha = 0.5, position = pd) +
    stat_summary(fun = "mean", geom = "line", alpha = 0.5, position = pd) + 
    stat_summary(fun = "mean", geom = "point", size = 3, alpha = 0.8, position = pd) +
    scale_color_brewer(palette="PuBu") +
    guides(col = FALSE) +
    facet_wrap(~Genus, nrow = 4, scales = "free_y") +
    labs(title = "", x = "CTS concentration (ton CTS/ha)", y = "Relative abundance (%)") +
    mytheme + 
    theme(strip.text = element_text(face = "italic")))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?

# ggsave("Figure_4_new.pdf", width = 8, height = 12, units = "cm", p3, scale = 1.5)
# ggsave("Figure_4_new.jpg", width = 8, height = 12, units = "cm", p3, scale = 1.5)

Session Info

sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggpmisc_0.5.1      ggpp_0.4.5         ggpubr_0.5.0       RColorBrewer_1.1-3
##  [5] ape_5.6-2          vegan_2.6-4        lattice_0.20-45    permute_0.9-7     
##  [9] forcats_0.5.2      stringr_1.4.1      dplyr_1.0.10       purrr_0.3.5       
## [13] readr_2.1.3        tidyr_1.2.1        tibble_3.1.8       ggplot2_3.4.0     
## [17] tidyverse_1.3.2   
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-157        fs_1.5.2            lubridate_1.8.0    
##  [4] httr_1.4.4          tools_4.2.1         backports_1.4.1    
##  [7] bslib_0.4.0         utf8_1.2.2          R6_2.5.1           
## [10] DBI_1.1.3           mgcv_1.8-40         colorspace_2.0-3   
## [13] withr_2.5.0         gridExtra_2.3       tidyselect_1.2.0   
## [16] compiler_4.2.1      cli_3.4.1           rvest_1.0.3        
## [19] quantreg_5.94       SparseM_1.81        xml2_1.3.3         
## [22] labeling_0.4.2      sass_0.4.2          scales_1.2.1       
## [25] digest_0.6.30       rmarkdown_2.17      pkgconfig_2.0.3    
## [28] htmltools_0.5.3     dbplyr_2.2.1        fastmap_1.1.0      
## [31] highr_0.9           rlang_1.0.6         readxl_1.4.1       
## [34] rstudioapi_0.14     farver_2.1.1        jquerylib_0.1.4    
## [37] generics_0.1.3      jsonlite_1.8.2      car_3.1-1          
## [40] googlesheets4_1.0.1 confintr_0.2.0      magrittr_2.0.3     
## [43] polynom_1.4-1       Matrix_1.5-3        Rcpp_1.0.9         
## [46] munsell_0.5.0       fansi_1.0.3         abind_1.4-5        
## [49] lifecycle_1.0.3     stringi_1.7.8       yaml_2.3.6         
## [52] carData_3.0-5       MASS_7.3-57         grid_4.2.1         
## [55] parallel_4.2.1      crayon_1.5.2        cowplot_1.1.1      
## [58] haven_2.5.1         splines_4.2.1       hms_1.1.2          
## [61] knitr_1.40          pillar_1.8.1        boot_1.3-28        
## [64] ggsignif_0.6.4      reprex_2.0.2        glue_1.6.2         
## [67] evaluate_0.17       modelr_0.1.9        vctrs_0.5.0        
## [70] tzdb_0.3.0          MatrixModels_0.5-1  cellranger_1.1.0   
## [73] gtable_0.3.1        assertthat_0.2.1    cachem_1.0.6       
## [76] xfun_0.34           broom_1.0.1         rstatix_0.7.1      
## [79] survival_3.3-1      googledrive_2.0.0   gargle_1.2.1       
## [82] cluster_2.1.3       ellipsis_0.3.2